library(partykit)
## Loading required package: grid
## Loading required package: libcoin
## Loading required package: mvtnorm
## Loading required package: rpart
library (dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library (ISLR)
library (ROCR)
## Loading required package: gplots
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
library (rpart)
library (tidyverse)
## ── Attaching packages ────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 2.2.1 ✔ readr 1.1.1
## ✔ tibble 1.4.2 ✔ purrr 0.2.4
## ✔ tidyr 0.8.0 ✔ stringr 1.2.0
## ✔ ggplot2 2.2.1 ✔ forcats 0.2.0
## ── Conflicts ───────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library (broom)
file_path <- '~/compscix415-DataSciencePrinciples/DataSciencePrinciples/data/admit_data_all_uc.csv'
admit_alldata <- read_csv(file = file_path, col_names=TRUE)
## Parsed with column specification:
## cols(
## `School Name` = col_character(),
## City = col_character(),
## `County/State/ Territory` = col_character(),
## `Measure Names` = col_character(),
## School = col_character(),
## Race = col_character(),
## `Measure Values` = col_integer()
## )
file_path <- '~/compscix415-DataSciencePrinciples/DataSciencePrinciples/data/Berkeley.csv'
Berkeley <- read_csv(file = file_path)
## Warning: Duplicated column names deduplicated: 'School' => 'School_1' [5]
## Parsed with column specification:
## cols(
## School = col_character(),
## City = col_character(),
## `County/State/ Territory` = col_character(),
## `Measure Names` = col_character(),
## School_1 = col_character(),
## `Uad Uc Ethn 6 Cat` = col_character(),
## `Measure Values` = col_integer()
## )
file_path <- '~/compscix415-DataSciencePrinciples/DataSciencePrinciples/data/Davis.csv'
Davis<- read_csv(file = file_path, col_names=TRUE)
## Parsed with column specification:
## cols(
## Calculation1 = col_character(),
## City = col_character(),
## `County/State/ Territory` = col_character(),
## `Measure Names` = col_character(),
## School = col_character(),
## `Uad Uc Ethn 6 Cat` = col_character(),
## `Measure Values` = col_integer()
## )
file_path <- '~/compscix415-DataSciencePrinciples/DataSciencePrinciples/data/Irvine.csv'
Irvine<- read_csv(file = file_path, col_names=TRUE)
## Parsed with column specification:
## cols(
## Calculation1 = col_character(),
## City = col_character(),
## `County/State/ Territory` = col_character(),
## `Measure Names` = col_character(),
## School = col_character(),
## `Uad Uc Ethn 6 Cat` = col_character(),
## `Measure Values` = col_integer()
## )
file_path <- '~/compscix415-DataSciencePrinciples/DataSciencePrinciples/data/LosAngeles_all.csv'
LA_all<- read_csv(file = file_path, col_names=TRUE)
## Parsed with column specification:
## cols(
## Calculation1 = col_character(),
## City = col_character(),
## `County/State/ Territory` = col_character(),
## `Measure Names` = col_character(),
## School = col_character(),
## `Uad Uc Ethn 6 Cat` = col_character(),
## `Measure Values` = col_integer()
## )
file_path <- '~/compscix415-DataSciencePrinciples/DataSciencePrinciples/data/Merced.csv'
Merced<- read_csv(file = file_path, col_names=TRUE)
## Parsed with column specification:
## cols(
## Calculation1 = col_character(),
## City = col_character(),
## `County/State/ Territory` = col_character(),
## `Measure Names` = col_character(),
## School = col_character(),
## `Uad Uc Ethn 6 Cat` = col_character(),
## `Measure Values` = col_integer()
## )
file_path <- '~/compscix415-DataSciencePrinciples/DataSciencePrinciples/data/Riverside.csv'
Riverside<- read_csv(file = file_path, col_names=TRUE)
## Parsed with column specification:
## cols(
## Calculation1 = col_character(),
## City = col_character(),
## `County/State/ Territory` = col_character(),
## `Measure Names` = col_character(),
## School = col_character(),
## `Uad Uc Ethn 6 Cat` = col_character(),
## `Measure Values` = col_integer()
## )
file_path <- '~/compscix415-DataSciencePrinciples/DataSciencePrinciples/data/SanDiego.csv'
SanDiego<- read_csv(file = file_path, col_names=TRUE)
## Parsed with column specification:
## cols(
## Calculation1 = col_character(),
## City = col_character(),
## `County/State/ Territory` = col_character(),
## `Measure Names` = col_character(),
## School = col_character(),
## `Uad Uc Ethn 6 Cat` = col_character(),
## `Measure Values` = col_integer()
## )
file_path <- '~/compscix415-DataSciencePrinciples/DataSciencePrinciples/data/SantaBarb.csv'
SantaBarb<- read_csv(file = file_path, col_names=TRUE)
## Parsed with column specification:
## cols(
## Calculation1 = col_character(),
## City = col_character(),
## `County/State/ Territory` = col_character(),
## `Measure Names` = col_character(),
## School = col_character(),
## `Uad Uc Ethn 6 Cat` = col_character(),
## `Measure Values` = col_integer()
## )
file_path <- '~/compscix415-DataSciencePrinciples/DataSciencePrinciples/data/SantaCruz.csv'
SantaCruz<- read_csv(file = file_path, col_names=TRUE)
## Parsed with column specification:
## cols(
## Calculation1 = col_character(),
## City = col_character(),
## `County/State/ Territory` = col_character(),
## `Measure Names` = col_character(),
## School = col_character(),
## `Uad Uc Ethn 6 Cat` = col_character(),
## `Measure Values` = col_integer()
## )
admit_alldata
## # A tibble: 12,876 x 7
## `School Name` City `County/State/ T… `Measure Names` School Race
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 A B MILLER HIG… Fonta… San Bernardino enr A B MI… Domes…
## 2 A B MILLER HIG… Fonta… San Bernardino adm A B MI… Domes…
## 3 A B MILLER HIG… Fonta… San Bernardino app A B MI… Domes…
## 4 A B MILLER HIG… Fonta… San Bernardino enr A B MI… Hispa…
## 5 A B MILLER HIG… Fonta… San Bernardino adm A B MI… Hispa…
## 6 A B MILLER HIG… Fonta… San Bernardino app A B MI… Hispa…
## 7 A B MILLER HIG… Fonta… San Bernardino enr A B MI… All
## 8 A B MILLER HIG… Fonta… San Bernardino adm A B MI… All
## 9 A B MILLER HIG… Fonta… San Bernardino app A B MI… All
## 10 ABRAHAM LINCOL… Los A… Los Angeles enr ABRAHA… Asian
## # ... with 12,866 more rows, and 1 more variable: `Measure Values` <int>
Berkeley
## # A tibble: 534 x 7
## School City `County/State/ … `Measure Names` School_1 `Uad Uc Ethn 6 …
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 ALAME… Alam… Alameda enr ALAMEDA… White
## 2 ALAME… Alam… Alameda adm ALAMEDA… White
## 3 ALAME… Alam… Alameda app ALAMEDA… White
## 4 ALAME… Alam… Alameda enr ALAMEDA… Asian
## 5 ALAME… Alam… Alameda adm ALAMEDA… Asian
## 6 ALAME… Alam… Alameda app ALAMEDA… Asian
## 7 ALAME… Alam… Alameda enr ALAMEDA… All
## 8 ALAME… Alam… Alameda adm ALAMEDA… All
## 9 ALAME… Alam… Alameda app ALAMEDA… All
## 10 ALAME… Alam… Alameda enr ALAMEDA… White
## # ... with 524 more rows, and 1 more variable: `Measure Values` <int>
admit_data <- spread(data=admit_alldata, key="Measure Names", value="Measure Values") %>% mutate(AdmitStat=adm/app, Campus="All", Year=2017)
Berkeley_admit <- spread(data=Berkeley, key="Measure Names", value="Measure Values") %>% mutate(AdmitStat=adm/app, Campus="Berkeley", Year=2017)
Davis_admit <- spread(data=Davis, key="Measure Names", value="Measure Values") %>% mutate(AdmitStat=adm/app, Campus="Davis", Year=2017)
Irvine_admit <- spread(data=Irvine, key="Measure Names", value="Measure Values") %>% mutate(AdmitStat=adm/app, Campus="Irvine", Year=2017)
LA_all_admit <- spread(data=LA_all, key="Measure Names", value="Measure Values") %>% mutate(AdmitStat=adm/app, Campus="LosAng", Year=2017)
Merced_admit <- spread(data=Merced, key="Measure Names", value="Measure Values") %>% mutate(AdmitStat=adm/app, Campus="Merced", Year=2017)
Riverside_admit <- spread(data=Riverside, key="Measure Names", value="Measure Values") %>% mutate(AdmitStat=adm/app, Campus="Riverside", Year=2017)
SanDiego_admit <- spread(data=SanDiego, key="Measure Names", value="Measure Values") %>% mutate(AdmitStat=adm/app, Campus="SanDiego", Year=2017)
SantaBarb_admit <- spread(SantaBarb, key="Measure Names", value="Measure Values") %>% mutate(AdmitStat=adm/app, Campus="SantaBarb", Year=2017)
SantaCruz_admit <- spread(data=SantaCruz, key="Measure Names", value="Measure Values") %>% mutate(AdmitStat=adm/app, Campus="SantaCruz", Year=2017)
admit_data
## # A tibble: 4,292 x 11
## `School Name` City `County/State/ … School Race adm app enr
## <chr> <chr> <chr> <chr> <chr> <int> <int> <int>
## 1 A B MILLER HIG… Fonta… San Bernardino A B MI… Dome… 28 30 13
## 2 A B MILLER HIG… Fonta… San Bernardino A B MI… Hisp… 36 52 18
## 3 A B MILLER HIG… Fonta… San Bernardino A B MI… All 68 88 34
## 4 ABRAHAM LINCOL… Los A… Los Angeles ABRAHA… Asian 25 36 22
## 5 ABRAHAM LINCOL… Los A… Los Angeles ABRAHA… Hisp… 10 21 7
## 6 ABRAHAM LINCOL… Los A… Los Angeles ABRAHA… All 39 61 33
## 7 ABRAHAM LINCOL… San F… San Francisco ABRAHA… White 7 12 6
## 8 ABRAHAM LINCOL… San F… San Francisco ABRAHA… Asian 105 193 73
## 9 ABRAHAM LINCOL… San F… San Francisco ABRAHA… Hisp… 14 24 7
## 10 ABRAHAM LINCOL… San F… San Francisco ABRAHA… Afri… NA 6 NA
## # ... with 4,282 more rows, and 3 more variables: AdmitStat <dbl>,
## # Campus <chr>, Year <dbl>
Berkeley_admit
## # A tibble: 178 x 11
## School City `County/State/ … School_1 `Uad Uc Ethn 6 … adm app
## <chr> <chr> <chr> <chr> <chr> <int> <int>
## 1 ALAMEDA… Alame… Alameda ALAMEDA … All 7 19
## 2 ALAMEDA… Alame… Alameda ALAMEDA … Asian NA 5
## 3 ALAMEDA… Alame… Alameda ALAMEDA … White 4 7
## 4 ALAMEDA… Alame… Alameda ALAMEDA … African American NA 9
## 5 ALAMEDA… Alame… Alameda ALAMEDA … All 29 125
## 6 ALAMEDA… Alame… Alameda ALAMEDA … Asian 15 77
## 7 ALAMEDA… Alame… Alameda ALAMEDA … Hispanic/ Latino NA 5
## 8 ALAMEDA… Alame… Alameda ALAMEDA … White 9 28
## 9 ALAMEDA… Alame… Alameda ALAMEDA … All 5 34
## 10 ALAMEDA… Alame… Alameda ALAMEDA … Asian 5 24
## # ... with 168 more rows, and 4 more variables: enr <int>,
## # AdmitStat <dbl>, Campus <chr>, Year <dbl>
Davis_admit
## # A tibble: 182 x 11
## Calculation1 City `County/State/ … School `Uad Uc Ethn 6 … adm app
## <chr> <chr> <chr> <chr> <chr> <int> <int>
## 1 ALAMEDA COM… Alam… Alameda ALAME… All 5 11
## 2 ALAMEDA COM… Alam… Alameda ALAME… Asian 3 5
## 3 ALAMEDA HIG… Alam… Alameda ALAME… African American NA 9
## 4 ALAMEDA HIG… Alam… Alameda ALAME… All 62 157
## 5 ALAMEDA HIG… Alam… Alameda ALAME… Asian 37 97
## 6 ALAMEDA HIG… Alam… Alameda ALAME… Hispanic/ Latino 4 11
## 7 ALAMEDA HIG… Alam… Alameda ALAME… White 17 34
## 8 ALAMEDA SCI… Alam… Alameda ALAME… All 13 35
## 9 ALAMEDA SCI… Alam… Alameda ALAME… Asian 10 24
## 10 ALAMEDA SCI… Alam… Alameda ALAME… Hispanic/ Latino NA 5
## # ... with 172 more rows, and 4 more variables: enr <int>,
## # AdmitStat <dbl>, Campus <chr>, Year <dbl>
Irvine_admit
## # A tibble: 146 x 11
## Calculation1 City `County/State/ … School `Uad Uc Ethn 6 … adm app
## <chr> <chr> <chr> <chr> <chr> <int> <int>
## 1 ALAMEDA COM… Alam… Alameda ALAME… All 5 8
## 2 ALAMEDA COM… Alam… Alameda ALAME… Asian 4 5
## 3 ALAMEDA HIG… Alam… Alameda ALAME… African American NA 6
## 4 ALAMEDA HIG… Alam… Alameda ALAME… All 54 128
## 5 ALAMEDA HIG… Alam… Alameda ALAME… Asian 41 95
## 6 ALAMEDA HIG… Alam… Alameda ALAME… White 5 18
## 7 ALAMEDA SCI… Alam… Alameda ALAME… All 12 30
## 8 ALAMEDA SCI… Alam… Alameda ALAME… Asian 10 22
## 9 ALBANY HIGH… Alba… Alameda ALBAN… All 24 68
## 10 ALBANY HIGH… Alba… Alameda ALBAN… Asian 20 42
## # ... with 136 more rows, and 4 more variables: enr <int>,
## # AdmitStat <dbl>, Campus <chr>, Year <dbl>
LA_all_admit
## # A tibble: 3,445 x 11
## Calculation1 City `County/State/ … School `Uad Uc Ethn 6 … adm app
## <chr> <chr> <chr> <chr> <chr> <int> <int>
## 1 A B MILLER … Font… San Bernardino A B M… Domestic Unknown NA 9
## 2 A B MILLER … Font… San Bernardino A B M… Hispanic/ Latino 3 27
## 3 A B MILLER … Font… San Bernardino A B M… All 4 39
## 4 ABRAHAM LIN… Los … Los Angeles ABRAH… Asian 3 26
## 5 ABRAHAM LIN… Los … Los Angeles ABRAH… Hispanic/ Latino 3 10
## 6 ABRAHAM LIN… Los … Los Angeles ABRAH… All 7 39
## 7 ABRAHAM LIN… San … San Francisco ABRAH… White NA 9
## 8 ABRAHAM LIN… San … San Francisco ABRAH… Asian 9 101
## 9 ABRAHAM LIN… San … San Francisco ABRAH… Hispanic/ Latino NA 7
## 10 ABRAHAM LIN… San … San Francisco ABRAH… All 11 125
## # ... with 3,435 more rows, and 4 more variables: enr <int>,
## # AdmitStat <dbl>, Campus <chr>, Year <dbl>
Merced_admit
## # A tibble: 119 x 11
## Calculation1 City `County/State/ … School `Uad Uc Ethn 6 … adm app
## <chr> <chr> <chr> <chr> <chr> <int> <int>
## 1 ALAMEDA COM… Alam… Alameda ALAME… All 5 5
## 2 ALAMEDA HIG… Alam… Alameda ALAME… All 41 48
## 3 ALAMEDA HIG… Alam… Alameda ALAME… Asian 27 33
## 4 ALAMEDA HIG… Alam… Alameda ALAME… White 9 9
## 5 ALAMEDA SCI… Alam… Alameda ALAME… All 7 9
## 6 ALBANY HIGH… Alba… Alameda ALBAN… All 12 15
## 7 ALBANY HIGH… Alba… Alameda ALBAN… Asian 9 10
## 8 AMADOR VALL… Plea… Alameda AMADO… All 45 55
## 9 AMADOR VALL… Plea… Alameda AMADO… Asian 26 32
## 10 AMADOR VALL… Plea… Alameda AMADO… Hispanic/ Latino 6 7
## # ... with 109 more rows, and 4 more variables: enr <int>,
## # AdmitStat <dbl>, Campus <chr>, Year <dbl>
Riverside_admit
## # A tibble: 115 x 11
## Calculation1 City `County/State/ … School `Uad Uc Ethn 6 … adm app
## <chr> <chr> <chr> <chr> <chr> <int> <int>
## 1 ALAMEDA HIG… Alam… Alameda ALAME… All 55 72
## 2 ALAMEDA HIG… Alam… Alameda ALAME… Asian 38 51
## 3 ALAMEDA HIG… Alam… Alameda ALAME… White 9 11
## 4 ALAMEDA SCI… Alam… Alameda ALAME… All 5 8
## 5 ALAMEDA SCI… Alam… Alameda ALAME… Asian 3 5
## 6 ALBANY HIGH… Alba… Alameda ALBAN… All 17 23
## 7 ALBANY HIGH… Alba… Alameda ALBAN… Asian 9 12
## 8 ALBANY HIGH… Alba… Alameda ALBAN… White 4 6
## 9 AMADOR VALL… Plea… Alameda AMADO… All 76 88
## 10 AMADOR VALL… Plea… Alameda AMADO… Asian 49 55
## # ... with 105 more rows, and 4 more variables: enr <int>,
## # AdmitStat <dbl>, Campus <chr>, Year <dbl>
SanDiego_admit
## # A tibble: 143 x 11
## Calculation1 City `County/State/ … School `Uad Uc Ethn 6 … adm app
## <chr> <chr> <chr> <chr> <chr> <int> <int>
## 1 ALAMEDA COM… Alam… Alameda ALAME… All 5 12
## 2 ALAMEDA HIG… Alam… Alameda ALAME… African American NA 7
## 3 ALAMEDA HIG… Alam… Alameda ALAME… All 53 148
## 4 ALAMEDA HIG… Alam… Alameda ALAME… Asian 33 95
## 5 ALAMEDA HIG… Alam… Alameda ALAME… Domestic Unknown NA 5
## 6 ALAMEDA HIG… Alam… Alameda ALAME… Hispanic/ Latino 3 8
## 7 ALAMEDA HIG… Alam… Alameda ALAME… White 14 29
## 8 ALAMEDA SCI… Alam… Alameda ALAME… All 5 28
## 9 ALAMEDA SCI… Alam… Alameda ALAME… Asian 4 21
## 10 ALBANY HIGH… Alba… Alameda ALBAN… All 38 87
## # ... with 133 more rows, and 4 more variables: enr <int>,
## # AdmitStat <dbl>, Campus <chr>, Year <dbl>
SantaBarb_admit
## # A tibble: 149 x 11
## Calculation1 City `County/State/ … School `Uad Uc Ethn 6 … adm app
## <chr> <chr> <chr> <chr> <chr> <int> <int>
## 1 ALAMEDA COM… Alam… Alameda ALAME… All 3 13
## 2 ALAMEDA COM… Alam… Alameda ALAME… White NA 6
## 3 ALAMEDA HIG… Alam… Alameda ALAME… African American NA 8
## 4 ALAMEDA HIG… Alam… Alameda ALAME… All 47 144
## 5 ALAMEDA HIG… Alam… Alameda ALAME… Asian 25 84
## 6 ALAMEDA HIG… Alam… Alameda ALAME… Domestic Unknown NA 6
## 7 ALAMEDA HIG… Alam… Alameda ALAME… Hispanic/ Latino 4 8
## 8 ALAMEDA HIG… Alam… Alameda ALAME… White 13 34
## 9 ALAMEDA SCI… Alam… Alameda ALAME… All 5 21
## 10 ALAMEDA SCI… Alam… Alameda ALAME… Asian 3 14
## # ... with 139 more rows, and 4 more variables: enr <int>,
## # AdmitStat <dbl>, Campus <chr>, Year <dbl>
SantaCruz_admit
## # A tibble: 167 x 11
## Calculation1 City `County/State/ … School `Uad Uc Ethn 6 … adm app
## <chr> <chr> <chr> <chr> <chr> <int> <int>
## 1 ALAMEDA COM… Alam… Alameda ALAME… All 9 18
## 2 ALAMEDA COM… Alam… Alameda ALAME… Asian NA 6
## 3 ALAMEDA HIG… Alam… Alameda ALAME… African American NA 6
## 4 ALAMEDA HIG… Alam… Alameda ALAME… All 74 133
## 5 ALAMEDA HIG… Alam… Alameda ALAME… Asian 43 80
## 6 ALAMEDA HIG… Alam… Alameda ALAME… Hispanic/ Latino 4 9
## 7 ALAMEDA HIG… Alam… Alameda ALAME… White 22 32
## 8 ALAMEDA SCI… Alam… Alameda ALAME… All 13 27
## 9 ALAMEDA SCI… Alam… Alameda ALAME… Asian 9 17
## 10 ALBANY HIGH… Alba… Alameda ALBAN… All 69 106
## # ... with 157 more rows, and 4 more variables: enr <int>,
## # AdmitStat <dbl>, Campus <chr>, Year <dbl>
names(admit_data)[1]<-"SchoolID"
names(admit_data)[3]<-"County"
All_campus <- admit_data %>%
filter(County=="Alameda")
##admit_data[10] <- NULL
All_campus
## # A tibble: 207 x 11
## SchoolID City County School Race adm app enr AdmitStat Campus
## <chr> <chr> <chr> <chr> <chr> <int> <int> <int> <dbl> <chr>
## 1 ALAMEDA … Alame… Alame… ALAME… White 6 7 6 0.857 All
## 2 ALAMEDA … Alame… Alame… ALAME… Asian 8 9 3 0.889 All
## 3 ALAMEDA … Alame… Alame… ALAME… Hisp… 4 5 NA 0.800 All
## 4 ALAMEDA … Alame… Alame… ALAME… All 21 25 11 0.840 All
## 5 ALAMEDA … Alame… Alame… ALAME… Dome… 4 6 NA 0.667 All
## 6 ALAMEDA … Alame… Alame… ALAME… White 37 50 22 0.740 All
## 7 ALAMEDA … Alame… Alame… ALAME… Asian 100 124 71 0.806 All
## 8 ALAMEDA … Alame… Alame… ALAME… Hisp… 8 12 5 0.667 All
## 9 ALAMEDA … Alame… Alame… ALAME… Afri… 8 14 5 0.571 All
## 10 ALAMEDA … Alame… Alame… ALAME… All 161 210 107 0.767 All
## # ... with 197 more rows, and 1 more variable: Year <dbl>
names(Berkeley_admit)[1]<-"SchoolID"
names(Berkeley_admit)[3]<-"County"
names(Berkeley_admit)[4]<-"School"
names(Berkeley_admit)[5]<-"Race"
Berkeley_admit
## # A tibble: 178 x 11
## SchoolID City County School Race adm app enr AdmitStat Campus
## <chr> <chr> <chr> <chr> <chr> <int> <int> <int> <dbl> <chr>
## 1 ALAMEDA … Alame… Alame… ALAME… All 7 19 5 0.368 Berke…
## 2 ALAMEDA … Alame… Alame… ALAME… Asian NA 5 NA NA Berke…
## 3 ALAMEDA … Alame… Alame… ALAME… White 4 7 4 0.571 Berke…
## 4 ALAMEDA … Alame… Alame… ALAME… Afri… NA 9 NA NA Berke…
## 5 ALAMEDA … Alame… Alame… ALAME… All 29 125 13 0.232 Berke…
## 6 ALAMEDA … Alame… Alame… ALAME… Asian 15 77 8 0.195 Berke…
## 7 ALAMEDA … Alame… Alame… ALAME… Hisp… NA 5 NA NA Berke…
## 8 ALAMEDA … Alame… Alame… ALAME… White 9 28 3 0.321 Berke…
## 9 ALAMEDA … Alame… Alame… ALAME… All 5 34 4 0.147 Berke…
## 10 ALAMEDA … Alame… Alame… ALAME… Asian 5 24 4 0.208 Berke…
## # ... with 168 more rows, and 1 more variable: Year <dbl>
names(Davis_admit)[1]<-"SchoolID"
names(Davis_admit)[3]<-"County"
names(Davis_admit)[4]<-"School"
names(Davis_admit)[5]<-"Race"
Davis_admit
## # A tibble: 182 x 11
## SchoolID City County School Race adm app enr AdmitStat Campus
## <chr> <chr> <chr> <chr> <chr> <int> <int> <int> <dbl> <chr>
## 1 ALAMEDA … Alame… Alame… ALAME… All 5 11 NA 0.455 Davis
## 2 ALAMEDA … Alame… Alame… ALAME… Asian 3 5 NA 0.600 Davis
## 3 ALAMEDA … Alame… Alame… ALAME… Afri… NA 9 NA NA Davis
## 4 ALAMEDA … Alame… Alame… ALAME… All 62 157 18 0.395 Davis
## 5 ALAMEDA … Alame… Alame… ALAME… Asian 37 97 14 0.381 Davis
## 6 ALAMEDA … Alame… Alame… ALAME… Hisp… 4 11 NA 0.364 Davis
## 7 ALAMEDA … Alame… Alame… ALAME… White 17 34 4 0.500 Davis
## 8 ALAMEDA … Alame… Alame… ALAME… All 13 35 6 0.371 Davis
## 9 ALAMEDA … Alame… Alame… ALAME… Asian 10 24 5 0.417 Davis
## 10 ALAMEDA … Alame… Alame… ALAME… Hisp… NA 5 NA NA Davis
## # ... with 172 more rows, and 1 more variable: Year <dbl>
names(Irvine_admit)[1]<-"SchoolID"
names(Irvine_admit)[3]<-"County"
names(Irvine_admit)[4]<-"School"
names(Irvine_admit)[5]<-"Race"
Irvine_admit
## # A tibble: 146 x 11
## SchoolID City County School Race adm app enr AdmitStat Campus
## <chr> <chr> <chr> <chr> <chr> <int> <int> <int> <dbl> <chr>
## 1 ALAMEDA … Alame… Alame… ALAME… All 5 8 NA 0.625 Irvine
## 2 ALAMEDA … Alame… Alame… ALAME… Asian 4 5 NA 0.800 Irvine
## 3 ALAMEDA … Alame… Alame… ALAME… Afri… NA 6 NA NA Irvine
## 4 ALAMEDA … Alame… Alame… ALAME… All 54 128 15 0.422 Irvine
## 5 ALAMEDA … Alame… Alame… ALAME… Asian 41 95 14 0.432 Irvine
## 6 ALAMEDA … Alame… Alame… ALAME… White 5 18 NA 0.278 Irvine
## 7 ALAMEDA … Alame… Alame… ALAME… All 12 30 3 0.400 Irvine
## 8 ALAMEDA … Alame… Alame… ALAME… Asian 10 22 3 0.455 Irvine
## 9 ALBANY H… Albany Alame… ALBAN… All 24 68 NA 0.353 Irvine
## 10 ALBANY H… Albany Alame… ALBAN… Asian 20 42 NA 0.476 Irvine
## # ... with 136 more rows, and 1 more variable: Year <dbl>
names(LA_all_admit)[1]<-"SchoolID"
names(LA_all_admit)[3]<-"County"
names(LA_all_admit)[4]<-"School"
names(LA_all_admit)[5]<-"Race"
LA_all_admit
## # A tibble: 3,445 x 11
## SchoolID City County School Race adm app enr AdmitStat Campus
## <chr> <chr> <chr> <chr> <chr> <int> <int> <int> <dbl> <chr>
## 1 A B MILL… Fonta… San B… A B M… Dome… NA 9 NA NA LosAng
## 2 A B MILL… Fonta… San B… A B M… Hisp… 3 27 NA 0.111 LosAng
## 3 A B MILL… Fonta… San B… A B M… All 4 39 NA 0.103 LosAng
## 4 ABRAHAM … Los A… Los A… ABRAH… Asian 3 26 3 0.115 LosAng
## 5 ABRAHAM … Los A… Los A… ABRAH… Hisp… 3 10 3 0.300 LosAng
## 6 ABRAHAM … Los A… Los A… ABRAH… All 7 39 7 0.179 LosAng
## 7 ABRAHAM … San F… San F… ABRAH… White NA 9 NA NA LosAng
## 8 ABRAHAM … San F… San F… ABRAH… Asian 9 101 6 0.0891 LosAng
## 9 ABRAHAM … San F… San F… ABRAH… Hisp… NA 7 NA NA LosAng
## 10 ABRAHAM … San F… San F… ABRAH… All 11 125 7 0.0880 LosAng
## # ... with 3,435 more rows, and 1 more variable: Year <dbl>
LA_admit <- LA_all_admit %>%
filter(County=="Alameda")
LA_admit
## # A tibble: 164 x 11
## SchoolID City County School Race adm app enr AdmitStat Campus
## <chr> <chr> <chr> <chr> <chr> <int> <int> <int> <dbl> <chr>
## 1 ALAMEDA … Alame… Alame… ALAME… White NA 5 NA NA LosAng
## 2 ALAMEDA … Alame… Alame… ALAME… Asian NA 5 NA NA LosAng
## 3 ALAMEDA … Alame… Alame… ALAME… All NA 13 NA NA LosAng
## 4 ALAMEDA … Alame… Alame… ALAME… White 3 21 NA 0.143 LosAng
## 5 ALAMEDA … Alame… Alame… ALAME… Asian 15 85 7 0.176 LosAng
## 6 ALAMEDA … Alame… Alame… ALAME… Afri… NA 8 NA NA LosAng
## 7 ALAMEDA … Alame… Alame… ALAME… All 22 126 11 0.175 LosAng
## 8 ALAMEDA … Alame… Alame… ALAME… Asian NA 17 NA NA LosAng
## 9 ALAMEDA … Alame… Alame… ALAME… Hisp… NA 5 NA NA LosAng
## 10 ALAMEDA … Alame… Alame… ALAME… All NA 25 NA NA LosAng
## # ... with 154 more rows, and 1 more variable: Year <dbl>
names(Merced_admit)[1]<-"SchoolID"
names(Merced_admit)[3]<-"County"
names(Merced_admit)[4]<-"School"
names(Merced_admit)[5]<-"Race"
Merced_admit
## # A tibble: 119 x 11
## SchoolID City County School Race adm app enr AdmitStat Campus
## <chr> <chr> <chr> <chr> <chr> <int> <int> <int> <dbl> <chr>
## 1 ALAMEDA … Alame… Alame… ALAME… All 5 5 NA 1.00 Merced
## 2 ALAMEDA … Alame… Alame… ALAME… All 41 48 6 0.854 Merced
## 3 ALAMEDA … Alame… Alame… ALAME… Asian 27 33 3 0.818 Merced
## 4 ALAMEDA … Alame… Alame… ALAME… White 9 9 NA 1.00 Merced
## 5 ALAMEDA … Alame… Alame… ALAME… All 7 9 NA 0.778 Merced
## 6 ALBANY H… Albany Alame… ALBAN… All 12 15 NA 0.800 Merced
## 7 ALBANY H… Albany Alame… ALBAN… Asian 9 10 NA 0.900 Merced
## 8 AMADOR V… Pleas… Alame… AMADO… All 45 55 6 0.818 Merced
## 9 AMADOR V… Pleas… Alame… AMADO… Asian 26 32 NA 0.812 Merced
## 10 AMADOR V… Pleas… Alame… AMADO… Hisp… 6 7 NA 0.857 Merced
## # ... with 109 more rows, and 1 more variable: Year <dbl>
names(Riverside_admit)[1]<-"SchoolID"
names(Riverside_admit)[3]<-"County"
names(Riverside_admit)[4]<-"School"
names(Riverside_admit)[5]<-"Race"
Riverside_admit
## # A tibble: 115 x 11
## SchoolID City County School Race adm app enr AdmitStat Campus
## <chr> <chr> <chr> <chr> <chr> <int> <int> <int> <dbl> <chr>
## 1 ALAMEDA … Alame… Alame… ALAME… All 55 72 10 0.764 River…
## 2 ALAMEDA … Alame… Alame… ALAME… Asian 38 51 8 0.745 River…
## 3 ALAMEDA … Alame… Alame… ALAME… White 9 11 NA 0.818 River…
## 4 ALAMEDA … Alame… Alame… ALAME… All 5 8 NA 0.625 River…
## 5 ALAMEDA … Alame… Alame… ALAME… Asian 3 5 NA 0.600 River…
## 6 ALBANY H… Albany Alame… ALBAN… All 17 23 4 0.739 River…
## 7 ALBANY H… Albany Alame… ALBAN… Asian 9 12 3 0.750 River…
## 8 ALBANY H… Albany Alame… ALBAN… White 4 6 NA 0.667 River…
## 9 AMADOR V… Pleas… Alame… AMADO… All 76 88 3 0.864 River…
## 10 AMADOR V… Pleas… Alame… AMADO… Asian 49 55 NA 0.891 River…
## # ... with 105 more rows, and 1 more variable: Year <dbl>
names(SanDiego_admit)[1]<-"SchoolID"
names(SanDiego_admit)[3]<-"County"
names(SanDiego_admit)[4]<-"School"
names(SanDiego_admit)[5]<-"Race"
SanDiego_admit
## # A tibble: 143 x 11
## SchoolID City County School Race adm app enr AdmitStat Campus
## <chr> <chr> <chr> <chr> <chr> <int> <int> <int> <dbl> <chr>
## 1 ALAMEDA … Alame… Alame… ALAME… All 5 12 NA 0.417 SanDi…
## 2 ALAMEDA … Alame… Alame… ALAME… Afri… NA 7 NA NA SanDi…
## 3 ALAMEDA … Alame… Alame… ALAME… All 53 148 7 0.358 SanDi…
## 4 ALAMEDA … Alame… Alame… ALAME… Asian 33 95 4 0.347 SanDi…
## 5 ALAMEDA … Alame… Alame… ALAME… Dome… NA 5 NA NA SanDi…
## 6 ALAMEDA … Alame… Alame… ALAME… Hisp… 3 8 NA 0.375 SanDi…
## 7 ALAMEDA … Alame… Alame… ALAME… White 14 29 NA 0.483 SanDi…
## 8 ALAMEDA … Alame… Alame… ALAME… All 5 28 NA 0.179 SanDi…
## 9 ALAMEDA … Alame… Alame… ALAME… Asian 4 21 NA 0.190 SanDi…
## 10 ALBANY H… Albany Alame… ALBAN… All 38 87 9 0.437 SanDi…
## # ... with 133 more rows, and 1 more variable: Year <dbl>
names(SantaBarb_admit)[1]<-"SchoolID"
names(SantaBarb_admit)[3]<-"County"
names(SantaBarb_admit)[4]<-"School"
names(SantaBarb_admit)[5]<-"Race"
SantaBarb_admit
## # A tibble: 149 x 11
## SchoolID City County School Race adm app enr AdmitStat Campus
## <chr> <chr> <chr> <chr> <chr> <int> <int> <int> <dbl> <chr>
## 1 ALAMEDA … Alame… Alame… ALAME… All 3 13 NA 0.231 Santa…
## 2 ALAMEDA … Alame… Alame… ALAME… White NA 6 NA NA Santa…
## 3 ALAMEDA … Alame… Alame… ALAME… Afri… NA 8 NA NA Santa…
## 4 ALAMEDA … Alame… Alame… ALAME… All 47 144 9 0.326 Santa…
## 5 ALAMEDA … Alame… Alame… ALAME… Asian 25 84 4 0.298 Santa…
## 6 ALAMEDA … Alame… Alame… ALAME… Dome… NA 6 NA NA Santa…
## 7 ALAMEDA … Alame… Alame… ALAME… Hisp… 4 8 NA 0.500 Santa…
## 8 ALAMEDA … Alame… Alame… ALAME… White 13 34 5 0.382 Santa…
## 9 ALAMEDA … Alame… Alame… ALAME… All 5 21 NA 0.238 Santa…
## 10 ALAMEDA … Alame… Alame… ALAME… Asian 3 14 NA 0.214 Santa…
## # ... with 139 more rows, and 1 more variable: Year <dbl>
names(SantaCruz_admit)[1]<-"SchoolID"
names(SantaCruz_admit)[3]<-"County"
names(SantaCruz_admit)[4]<-"School"
names(SantaCruz_admit)[5]<-"Race"
SantaCruz_admit
## # A tibble: 167 x 11
## SchoolID City County School Race adm app enr AdmitStat Campus
## <chr> <chr> <chr> <chr> <chr> <int> <int> <int> <dbl> <chr>
## 1 ALAMEDA … Alame… Alame… ALAME… All 9 18 NA 0.500 Santa…
## 2 ALAMEDA … Alame… Alame… ALAME… Asian NA 6 NA NA Santa…
## 3 ALAMEDA … Alame… Alame… ALAME… Afri… NA 6 NA NA Santa…
## 4 ALAMEDA … Alame… Alame… ALAME… All 74 133 18 0.556 Santa…
## 5 ALAMEDA … Alame… Alame… ALAME… Asian 43 80 9 0.538 Santa…
## 6 ALAMEDA … Alame… Alame… ALAME… Hisp… 4 9 NA 0.444 Santa…
## 7 ALAMEDA … Alame… Alame… ALAME… White 22 32 8 0.688 Santa…
## 8 ALAMEDA … Alame… Alame… ALAME… All 13 27 NA 0.481 Santa…
## 9 ALAMEDA … Alame… Alame… ALAME… Asian 9 17 NA 0.529 Santa…
## 10 ALBANY H… Albany Alame… ALBAN… All 69 106 9 0.651 Santa…
## # ... with 157 more rows, and 1 more variable: Year <dbl>
ggplot(data=All_campus)+
geom_point(mapping= aes(x=School, y=AdmitStat, color=Race))+
facet_wrap(~ City, nrow = 2)+
labs (y="All campus admissions", x='2017')
## Warning: Removed 13 rows containing missing values (geom_point).

ggplot(data=Berkeley_admit)+
geom_point(mapping= aes(x=School, y=AdmitStat, color=Race))+
facet_wrap(~ City, nrow = 2) +
labs(y= "Berkeley admissions", x="2017")
## Warning: Removed 81 rows containing missing values (geom_point).

ggplot(data=Davis_admit)+
geom_point(mapping= aes(x=School, y=AdmitStat, color=Race))+
facet_wrap(~ City, nrow = 2) +
labs(y= "Davis admissions", x="2017")
## Warning: Removed 45 rows containing missing values (geom_point).

ggplot(data=Irvine_admit)+
geom_point(mapping= aes(x=School, y=AdmitStat, color=Race))+
facet_wrap(~ City, nrow = 2) +
labs(y= "Irvine admissions", x="2017")
## Warning: Removed 49 rows containing missing values (geom_point).

ggplot(data=LA_admit)+
geom_point(mapping= aes(x=School, y=AdmitStat, color=Race))+
facet_wrap(~ City, nrow = 2) +
labs(y= "Los Angeles admissions", x="2017")
## Warning: Removed 95 rows containing missing values (geom_point).

ggplot(data=Merced_admit)+
geom_point(mapping= aes(x=School, y=AdmitStat, color=Race))+
facet_wrap(~ City, nrow = 2) +
labs(y= "Merced admissions", x="2017")
## Warning: Removed 7 rows containing missing values (geom_point).

ggplot(data=Riverside_admit)+
geom_point(mapping= aes(x=School, y=AdmitStat, color=Race))+
facet_wrap(~ City, nrow = 2) +
labs(y= "Riverside admissions", x="2017")
## Warning: Removed 18 rows containing missing values (geom_point).

ggplot(data=SanDiego_admit)+
geom_point(mapping= aes(x=School, y=AdmitStat, color=Race))+
facet_wrap(~ City, nrow = 2) +
labs(y= "San Diego admissions", x="2017")
## Warning: Removed 49 rows containing missing values (geom_point).

ggplot(data=SantaBarb_admit)+
geom_point(mapping= aes(x=School, y=AdmitStat, color=Race))+
facet_wrap(~ City, nrow = 2) +
labs(y= "SantaBarb admissions", x="2017")
## Warning: Removed 48 rows containing missing values (geom_point).

ggplot(data=SantaCruz_admit)+
geom_point(mapping= aes(x=School, y=AdmitStat, color=Race))+
facet_wrap(~ City, nrow = 2) +
labs(y= "SantaCruz admissions", x="2017")
## Warning: Removed 26 rows containing missing values (geom_point).

tree_modAll <- rpart(AdmitStat ~ City + Race, data = All_campus)
plot(as.party(tree_modAll))

tree_modBerkeley <- rpart(AdmitStat ~ City + Race, data = Berkeley_admit)
plot(as.party(tree_modBerkeley))

tree_modDavis <- rpart(AdmitStat ~ City + Race, data = Davis_admit)
plot(as.party(tree_modDavis))

tree_modIrvine <- rpart(AdmitStat ~ City + Race, data = Irvine_admit)
plot(as.party(tree_modIrvine))

tree_modLA <- rpart(AdmitStat ~ City + Race, data = LA_admit)
plot(as.party(tree_modLA))

tree_modMerced <- rpart(AdmitStat ~ City + Race, data = Merced_admit)
plot(as.party(tree_modMerced))

tree_modRiverside <- rpart(AdmitStat ~ City + Race, data = Riverside_admit)
plot(as.party(tree_modRiverside))

tree_modSanDiego <- rpart(AdmitStat ~ City + Race, data = SanDiego_admit)
plot(as.party(tree_modSanDiego))

tree_modSantaBarb <- rpart(AdmitStat ~ City + Race, data = SantaBarb_admit)
plot(as.party(tree_modSantaBarb))

tree_modSantaCruz <- rpart(AdmitStat ~ City + Race, data = SantaCruz_admit)
plot(as.party(tree_modSantaCruz))

AdmittanceAll <- lm(AdmitStat ~ City + Race, data = All_campus)
# take a look at the features and coefficients
tidy(AdmittanceAll)
## term estimate std.error statistic p.value
## 1 (Intercept) 0.58908165 0.03765383 15.6446674 6.966630e-35
## 2 CityAlbany -0.04364858 0.06180058 -0.7062811 4.809690e-01
## 3 CityBerkeley 0.03131487 0.05438830 0.5757648 5.655267e-01
## 4 CityCastro Valley -0.13008122 0.06180058 -2.1048544 3.675684e-02
## 5 CityDublin -0.05915651 0.05807523 -1.0186186 3.098147e-01
## 6 CityEmeryville -0.43761167 0.12665553 -3.4551328 6.927625e-04
## 7 CityFremont -0.06134431 0.03823063 -1.6045855 1.104196e-01
## 8 CityHayward -0.04176112 0.04077306 -1.0242332 3.071635e-01
## 9 CityLivermore -0.12347166 0.04679446 -2.6385958 9.089147e-03
## 10 CityNewark -0.20051806 0.06765016 -2.9640440 3.467069e-03
## 11 CityOakland -0.09221864 0.03304373 -2.7908062 5.851823e-03
## 12 CityPiedmont -0.07456624 0.06184689 -1.2056587 2.296051e-01
## 13 CityPleasanton -0.09781594 0.04862928 -2.0114621 4.583794e-02
## 14 CitySan Leandro -0.18882813 0.06180058 -3.0554427 2.605618e-03
## 15 CitySan Lorenzo -0.05534944 0.04545184 -1.2177603 2.249830e-01
## 16 CityUnion City -0.16070190 0.06180058 -2.6003300 1.012369e-02
## 17 RaceAll 0.12125729 0.03072684 3.9462993 1.154763e-04
## 18 RaceAsian 0.23301564 0.03338727 6.9791767 6.168482e-11
## 19 RaceDomestic Unknown 0.33997418 0.07810254 4.3529211 2.299817e-05
## 20 RaceHispanic/ Latino 0.06605839 0.03146694 2.0992949 3.725052e-02
## 21 RaceInter- national 0.36219765 0.07861341 4.6073263 7.913258e-06
## 22 RaceWhite 0.13474682 0.03661642 3.6799561 3.118570e-04
AdmittanceBerk <- lm(AdmitStat ~ City + Race, data = Berkeley_admit)
# take a look at the features and coefficients
tidy(AdmittanceBerk)
## term estimate std.error statistic p.value
## 1 (Intercept) 0.30170196 0.04699590 6.4197501 1.020036e-08
## 2 CityAlbany -0.07676263 0.05302535 -1.4476590 1.517719e-01
## 3 CityBerkeley -0.09253793 0.04544192 -2.0364002 4.514851e-02
## 4 CityCastro Valley -0.17143770 0.05399522 -3.1750532 2.154705e-03
## 5 CityDublin -0.02424574 0.05525148 -0.4388251 6.620172e-01
## 6 CityFremont -0.10142922 0.03631176 -2.7932884 6.579716e-03
## 7 CityHayward -0.04666524 0.03892313 -1.1989077 2.342405e-01
## 8 CityLivermore -0.16983582 0.04772906 -3.5583318 6.429013e-04
## 9 CityNewark -0.20224649 0.08437664 -2.3969489 1.895709e-02
## 10 CityOakland -0.05127682 0.03183892 -1.6105074 1.113793e-01
## 11 CityPiedmont -0.11718747 0.05302535 -2.2100273 3.007367e-02
## 12 CityPleasanton -0.07416052 0.03915978 -1.8937931 6.200849e-02
## 13 CitySan Leandro -0.10454159 0.06240456 -1.6752235 9.794717e-02
## 14 CitySan Lorenzo -0.14728550 0.04815752 -3.0584110 3.060024e-03
## 15 CityUnion City -0.09270753 0.05302535 -1.7483625 8.438693e-02
## 16 RaceAll -0.02253240 0.03915223 -0.5755074 5.666263e-01
## 17 RaceAsian 0.01026809 0.04122732 0.2490605 8.039772e-01
## 18 RaceDomestic Unknown 0.07631029 0.05876042 1.2986681 1.979337e-01
## 19 RaceHispanic/ Latino -0.01022204 0.04329386 -0.2361084 8.139755e-01
## 20 RaceWhite 0.02830041 0.04679627 0.6047578 5.471180e-01
AdmittanceDavis<- lm(AdmitStat ~ City + Race, data = Davis_admit)
# take a look at the features and coefficients
tidy(AdmittanceDavis)
## term estimate std.error statistic p.value
## 1 (Intercept) 0.436100047 0.06126792 7.1179183 9.792852e-11
## 2 CityAlbany 0.012192978 0.06752361 0.1805736 8.570174e-01
## 3 CityBerkeley 0.005691782 0.05662403 0.1005189 9.201059e-01
## 4 CityCastro Valley -0.131550946 0.06317815 -2.0822222 3.952088e-02
## 5 CityDublin -0.071224657 0.06991099 -1.0187906 3.104225e-01
## 6 CityFremont -0.128391850 0.04727786 -2.7156863 7.625403e-03
## 7 CityHayward -0.023336212 0.04568820 -0.5107711 6.104820e-01
## 8 CityLivermore -0.103765112 0.05367040 -1.9333770 5.562660e-02
## 9 CityNewark -0.129171703 0.06752361 -1.9129857 5.821507e-02
## 10 CityOakland -0.014355825 0.04076334 -0.3521749 7.253461e-01
## 11 CityPiedmont -0.139641731 0.07528275 -1.8548969 6.615017e-02
## 12 CityPleasanton -0.099163436 0.05290598 -1.8743331 6.340014e-02
## 13 CitySan Leandro -0.137009083 0.07554368 -1.8136405 7.231827e-02
## 14 CitySan Lorenzo -0.187802405 0.05400754 -3.4773370 7.137036e-04
## 15 CityUnion City -0.165657404 0.06752361 -2.4533256 1.564156e-02
## 16 RaceAll -0.026118890 0.05166091 -0.5055832 6.141086e-01
## 17 RaceAsian 0.019387707 0.05333977 0.3634756 7.169112e-01
## 18 RaceDomestic Unknown 0.188748337 0.08918513 2.1163655 3.645159e-02
## 19 RaceHispanic/ Latino -0.028420496 0.05363832 -0.5298543 5.972257e-01
## 20 RaceInter- national 0.277677596 0.09934186 2.7951721 6.072113e-03
## 21 RaceWhite 0.028666470 0.05851687 0.4898839 6.251413e-01
AdmittanceIrvine <- lm(AdmitStat ~ City + Race, data = Irvine_admit)
# take a look at the features and coefficients
tidy(AdmittanceBerk)
## term estimate std.error statistic p.value
## 1 (Intercept) 0.30170196 0.04699590 6.4197501 1.020036e-08
## 2 CityAlbany -0.07676263 0.05302535 -1.4476590 1.517719e-01
## 3 CityBerkeley -0.09253793 0.04544192 -2.0364002 4.514851e-02
## 4 CityCastro Valley -0.17143770 0.05399522 -3.1750532 2.154705e-03
## 5 CityDublin -0.02424574 0.05525148 -0.4388251 6.620172e-01
## 6 CityFremont -0.10142922 0.03631176 -2.7932884 6.579716e-03
## 7 CityHayward -0.04666524 0.03892313 -1.1989077 2.342405e-01
## 8 CityLivermore -0.16983582 0.04772906 -3.5583318 6.429013e-04
## 9 CityNewark -0.20224649 0.08437664 -2.3969489 1.895709e-02
## 10 CityOakland -0.05127682 0.03183892 -1.6105074 1.113793e-01
## 11 CityPiedmont -0.11718747 0.05302535 -2.2100273 3.007367e-02
## 12 CityPleasanton -0.07416052 0.03915978 -1.8937931 6.200849e-02
## 13 CitySan Leandro -0.10454159 0.06240456 -1.6752235 9.794717e-02
## 14 CitySan Lorenzo -0.14728550 0.04815752 -3.0584110 3.060024e-03
## 15 CityUnion City -0.09270753 0.05302535 -1.7483625 8.438693e-02
## 16 RaceAll -0.02253240 0.03915223 -0.5755074 5.666263e-01
## 17 RaceAsian 0.01026809 0.04122732 0.2490605 8.039772e-01
## 18 RaceDomestic Unknown 0.07631029 0.05876042 1.2986681 1.979337e-01
## 19 RaceHispanic/ Latino -0.01022204 0.04329386 -0.2361084 8.139755e-01
## 20 RaceWhite 0.02830041 0.04679627 0.6047578 5.471180e-01
AdmittanceLA <- lm(AdmitStat ~ City + Race, data = LA_admit)
# take a look at the features and coefficients
tidy(AdmittanceLA)
## term estimate std.error statistic p.value
## 1 (Intercept) 0.155338019 0.08288356 1.87417168 0.06687533
## 2 CityAlbany 0.013148437 0.05496244 0.23922587 0.81192826
## 3 CityBerkeley -0.020939401 0.05496244 -0.38097658 0.70486747
## 4 CityCastro Valley -0.006319712 0.05271924 -0.11987488 0.90507242
## 5 CityDublin 0.016699906 0.05753768 0.29024295 0.77285519
## 6 CityFremont -0.004962854 0.04285261 -0.11581220 0.90827472
## 7 CityHayward 0.042436924 0.04875717 0.87037300 0.38834068
## 8 CityLivermore 0.016176491 0.04835639 0.33452645 0.73941007
## 9 CityNewark -0.061777550 0.08092268 -0.76341449 0.44887641
## 10 CityOakland 0.058947695 0.04135031 1.42556823 0.16033256
## 11 CityPiedmont 0.001397573 0.05496244 0.02542778 0.97981702
## 12 CityPleasanton -0.012686211 0.04828042 -0.26276101 0.79383646
## 13 CitySan Leandro 0.037066677 0.06264863 0.59165981 0.55679844
## 14 CitySan Lorenzo -0.050225258 0.05527657 -0.90861749 0.36799954
## 15 CityUnion City 0.009740954 0.06264863 0.15548551 0.87707735
## 16 RaceAll -0.007846184 0.07511015 -0.10446236 0.91722887
## 17 RaceAsian -0.001350464 0.07612039 -0.01774115 0.98591739
## 18 RaceDomestic Unknown 0.117587065 0.08843929 1.32957945 0.18981372
## 19 RaceHispanic/ Latino 0.019328381 0.09049700 0.21358034 0.83176054
## 20 RaceWhite 0.007268717 0.07794905 0.09324960 0.92608552
AdmittanceMerced <- lm(AdmitStat ~ City + Race, data = Merced_admit)
# take a look at the features and coefficients
tidy(AdmittanceMerced)
## term estimate std.error statistic p.value
## 1 (Intercept) 0.928250569 0.07768897 11.94829315 2.038593e-20
## 2 CityAlbany -0.040288380 0.09548715 -0.42192464 6.740643e-01
## 3 CityBerkeley -0.018618669 0.07117581 -0.26158702 7.942246e-01
## 4 CityCastro Valley -0.061636086 0.08248297 -0.74725830 4.568133e-01
## 5 CityDublin -0.196820135 0.08248297 -2.38619111 1.907240e-02
## 6 CityFremont -0.011046024 0.05905941 -0.18703242 8.520471e-01
## 7 CityHayward -0.168428324 0.05755990 -2.92614018 4.322941e-03
## 8 CityLivermore -0.063570795 0.06954650 -0.91407608 3.630663e-01
## 9 CityNewark -0.181955047 0.09548715 -1.90554492 5.983197e-02
## 10 CityOakland -0.185199864 0.05079990 -3.64567384 4.414968e-04
## 11 CityPiedmont -0.038262102 0.08248297 -0.46387880 6.438303e-01
## 12 CityPleasanton -0.027939846 0.06404660 -0.43624246 6.636828e-01
## 13 CitySan Leandro -0.152972444 0.08278217 -1.84789117 6.783113e-02
## 14 CitySan Lorenzo -0.041431090 0.06229190 -0.66511196 5.076428e-01
## 15 CityUnion City -0.175293873 0.08278217 -2.11753170 3.691214e-02
## 16 RaceAll -0.072495955 0.06395157 -1.13360715 2.599048e-01
## 17 RaceAsian -0.003428424 0.06683122 -0.05129973 9.591979e-01
## 18 RaceDomestic Unknown 0.082795455 0.11182437 0.74040620 4.609390e-01
## 19 RaceHispanic/ Latino -0.128382218 0.06554627 -1.95865011 5.318234e-02
## 20 RaceWhite -0.004755309 0.07817703 -0.06082744 9.516286e-01
AdmittanceRiverside<- lm(AdmitStat ~ City + Race, data = Riverside_admit)
# take a look at the features and coefficients
tidy(AdmittanceRiverside)
## term estimate std.error statistic p.value
## 1 (Intercept) 0.68614363 0.09055040 7.5774775 6.594168e-11
## 2 CityAlbany -0.02395866 0.09281323 -0.2581384 7.969888e-01
## 3 CityBerkeley -0.14328209 0.08063243 -1.7769785 7.952109e-02
## 4 CityCastro Valley -0.05328411 0.08627350 -0.6176185 5.386492e-01
## 5 CityDublin 0.08427923 0.09281323 0.9080519 3.666841e-01
## 6 CityFremont -0.01408112 0.06174472 -0.2280538 8.202090e-01
## 7 CityHayward -0.16686131 0.06530323 -2.5551771 1.258479e-02
## 8 CityLivermore -0.08058540 0.07515838 -1.0722078 2.869757e-01
## 9 CityNewark -0.16160154 0.10995345 -1.4697269 1.457100e-01
## 10 CityOakland -0.30546094 0.06261875 -4.8781069 5.646804e-06
## 11 CityPiedmont 0.13139189 0.09281323 1.4156590 1.609075e-01
## 12 CityPleasanton 0.04670401 0.07061709 0.6613698 5.103494e-01
## 13 CitySan Leandro -0.32898717 0.10757104 -3.0583246 3.060810e-03
## 14 CitySan Lorenzo -0.30118592 0.07626552 -3.9491753 1.722958e-04
## 15 CityUnion City -0.23461552 0.09352685 -2.5085366 1.422558e-02
## 16 RaceAll 0.02292727 0.07653816 0.2995534 7.653249e-01
## 17 RaceAsian 0.09609314 0.07854389 1.2234324 2.248973e-01
## 18 RaceDomestic Unknown 0.30767578 0.11138828 2.7621917 7.176565e-03
## 19 RaceHispanic/ Latino -0.02756700 0.07958587 -0.3463805 7.300015e-01
## 20 RaceWhite 0.05022177 0.08393068 0.5983720 5.513479e-01
AdmittanceSanDiego <- lm(AdmitStat ~ City + Race, data = SanDiego_admit)
# take a look at the features and coefficients
tidy(AdmittanceSanDiego)
## term estimate std.error statistic p.value
## 1 (Intercept) 0.348812843 0.08118680 4.29642324 5.207291e-05
## 2 CityAlbany 0.071694117 0.07813186 0.91760416 3.618072e-01
## 3 CityBerkeley -0.097737566 0.06681538 -1.46280047 1.477564e-01
## 4 CityCastro Valley -0.088278127 0.07042032 -1.25358880 2.139376e-01
## 5 CityDublin -0.155540428 0.07813186 -1.99074274 5.020209e-02
## 6 CityFremont -0.082676158 0.05330210 -1.55108622 1.251476e-01
## 7 CityHayward 0.002984948 0.05184676 0.05757251 9.542443e-01
## 8 CityLivermore -0.134591206 0.06206173 -2.16866661 3.332198e-02
## 9 CityNewark -0.108630720 0.09117640 -1.19143468 2.372903e-01
## 10 CityOakland -0.023114482 0.04769992 -0.48458115 6.294048e-01
## 11 CityPiedmont -0.121263043 0.07813186 -1.55203072 1.249215e-01
## 12 CityPleasanton -0.082466339 0.06058815 -1.36109681 1.776137e-01
## 13 CitySan Leandro -0.088017999 0.09117640 -0.96535949 3.375091e-01
## 14 CitySan Lorenzo 0.001177797 0.06147418 0.01915922 9.847657e-01
## 15 CityUnion City -0.041678374 0.07813186 -0.53343637 5.953302e-01
## 16 RaceAll 0.023805554 0.07288298 0.32662706 7.448720e-01
## 17 RaceAsian 0.070189174 0.07368340 0.95257787 3.439052e-01
## 18 RaceDomestic Unknown 0.206172199 0.11316300 1.82190471 7.251028e-02
## 19 RaceHispanic/ Latino 0.018689222 0.08032452 0.23267145 8.166589e-01
## 20 RaceWhite 0.062932667 0.07979894 0.78864038 4.328415e-01
AdmittanceSantaBarb <- lm(AdmitStat ~ City + Race, data = SantaBarb_admit)
# take a look at the features and coefficients
tidy(AdmittanceSantaBarb)
## term estimate std.error statistic p.value
## 1 (Intercept) 0.457249368 0.06593913 6.9344159 9.023858e-10
## 2 CityAlbany 0.049374434 0.07472863 0.6607164 5.106690e-01
## 3 CityBerkeley -0.056636379 0.06323152 -0.8956985 3.730679e-01
## 4 CityCastro Valley -0.115004109 0.06323152 -1.8187782 7.264117e-02
## 5 CityDublin -0.079647493 0.06848780 -1.1629443 2.482668e-01
## 6 CityFremont -0.022778571 0.04724560 -0.4821310 6.310131e-01
## 7 CityHayward -0.024272041 0.05048481 -0.4807791 6.319696e-01
## 8 CityLivermore -0.005568372 0.05220010 -0.1066736 9.153117e-01
## 9 CityNewark -0.025882784 0.08807543 -0.2938706 7.696084e-01
## 10 CityOakland -0.073082757 0.04735976 -1.5431405 1.266947e-01
## 11 CityPiedmont -0.066509410 0.07472863 -0.8900124 3.760950e-01
## 12 CityPleasanton -0.029968514 0.05277722 -0.5678305 5.717207e-01
## 13 CitySan Leandro -0.051428940 0.07525654 -0.6833817 4.963159e-01
## 14 CitySan Lorenzo -0.075843231 0.06749297 -1.1237204 2.644510e-01
## 15 CityUnion City -0.048391514 0.07472863 -0.6475632 5.190987e-01
## 16 RaceAll -0.111377551 0.05755343 -1.9352027 5.645539e-02
## 17 RaceAsian -0.055059321 0.05814849 -0.9468745 3.465190e-01
## 18 RaceDomestic Unknown 0.068667015 0.08324211 0.8249072 4.118470e-01
## 19 RaceHispanic/ Latino -0.113524411 0.06428791 -1.7658751 8.118495e-02
## 20 RaceWhite -0.070410663 0.06135625 -1.1475711 2.545239e-01
AdmittanceSantaCruz <- lm(AdmitStat ~ City + Race, data = SantaCruz_admit)
# take a look at the features and coefficients
tidy(AdmittanceSantaCruz)
## term estimate std.error statistic p.value
## 1 (Intercept) 0.589985725 0.09165207 6.4372333 2.615254e-09
## 2 CityAlbany -0.011885319 0.10133643 -0.1172857 9.068297e-01
## 3 CityBerkeley 0.139507557 0.08482305 1.6446892 1.026511e-01
## 4 CityCastro Valley -0.149847332 0.10133643 -1.4787113 1.418377e-01
## 5 CityDublin -0.089534350 0.11299597 -0.7923676 4.297100e-01
## 6 CityFremont -0.095179847 0.06823796 -1.3948225 1.656459e-01
## 7 CityHayward -0.101494194 0.07113650 -1.4267526 1.562479e-01
## 8 CityLivermore -0.115521248 0.08049385 -1.4351562 1.538439e-01
## 9 CityNewark -0.232994075 0.10133643 -2.2992133 2.322204e-02
## 10 CityOakland -0.084864813 0.05990685 -1.4166129 1.591869e-01
## 11 CityPiedmont -0.138071141 0.11299597 -1.2219121 2.241358e-01
## 12 CityPleasanton -0.093757794 0.08172427 -1.1472454 2.535624e-01
## 13 CitySan Leandro -0.116385123 0.11330381 -1.0271951 3.063947e-01
## 14 CitySan Lorenzo -0.049459556 0.08101657 -0.6104869 5.426938e-01
## 15 CityUnion City -0.156494807 0.10133643 -1.5443094 1.251465e-01
## 16 RaceAll -0.001426157 0.07712750 -0.0184909 9.852779e-01
## 17 RaceAsian 0.104907978 0.08026846 1.3069639 1.937240e-01
## 18 RaceDomestic Unknown 0.230497613 0.13312320 1.7314609 8.593958e-02
## 19 RaceHispanic/ Latino -0.064958715 0.07909208 -0.8213049 4.131013e-01
## 20 RaceInter- national 0.505194122 0.15026000 3.3621332 1.038191e-03
## 21 RaceWhite 0.119031531 0.08579450 1.3874027 1.678902e-01